TODO
s_1ha_qres <- qresid(s_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(s_1ha_qres))
#QQ plot
qqnorm(s_1ha_qres); qqline(s_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_1ha), residuals(s_1ha, type = "response"))
k.check(s_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 2.8566447807 0.9598875 0.95
## s(plot) 4 0.0001271343 NA NA
## s(spei_history,L) 70 4.9668619782 NA NA
Basis dimension checking with gam.check() doesn’t appear to work for dlnm crossbasis smooths. Instead I’ll use a method described in the help file ?choose.k to check for adequate knots. Unfortunately, bs = "cs" doesn’t work with dlnm, so I’ll use select = TRUE instead to reduce chance of overfitting.
check_res_edf(s_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 15.6
The crossbasis smooth possibly needs more knots.
# plot_lag_slice(s_1ha, "spei_history", lag = 0) + labs(title = "surv, 1ha")
Here you can see how the CIs hardly overlap 0, but it could be an artifact of the line not being allowed to be wiggly enough.
summary(s_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot, bs = "re") +
## s(spei_history, L, bs = "cb", k = c(3, 35), xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.17262 0.07856 40.386 <2e-16 ***
## flwr_prev1 -0.51788 0.44094 -1.174 0.24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 2.8566448 9 359.56 <2e-16 ***
## s(plot) 0.0001271 3 0.00 0.507
## s(spei_history,L) 4.9668620 27 57.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0599 Deviance explained = 13.2%
## fREML = 12516 Scale est. = 1 n = 9183
draw(s_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
s_cf_qres <- qresid(s_cf)
par(mfrow = c(2,2))
#histogram
plot(density(s_cf_qres))
#QQ plot
qqnorm(s_cf_qres); qqline(s_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_cf), residuals(s_cf, type = "response"))
k.check(s_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 3.455151 0.9094799 0.185
## s(plot) 6 4.404063 NA NA
## s(spei_history,L) 70 11.101496 NA NA
# looking for near zero edf
check_res_edf(s_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 15.8
summary(s_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot, bs = "re") +
## s(spei_history, L, bs = "cb", k = c(3, 35), xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.41882 0.14056 24.323 <2e-16 ***
## flwr_prev1 0.09828 0.26873 0.366 0.715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.455 9 1976.65 <2e-16 ***
## s(plot) 4.404 5 42.83 <2e-16 ***
## s(spei_history,L) 11.101 20 180.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.113 Deviance explained = 20%
## fREML = 43593 Scale est. = 1 n = 31701
draw(s_cf)
## Warning: Removed 939 rows containing non-finite values (stat_contour).
Here I use a random sub-sample to check that differences between continuous forest and fragments are not purely due to sample size differences, particularly differences in the complexity of the crossbasis smooth.
summary(s_1ha)$edf[3]
## [1] 4.966862
summary(s_cf)$edf[3]
## [1] 11.1015
summary(s_cf_sub)$edf[3]
## [1] 1.930495
draw(s_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Surface still looks different in a similar way though.
appraise(g_1ha)
k.check(g_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 4.185915 0.9818111 0.105
## s(plot) 4 2.704746 NA NA
## s(spei_history,L) 70 18.385338 NA NA
check_res_edf(g_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
summary(g_1ha)
##
## Family: Scaled t(4.727,0.477)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = c(3, 35),
## xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.37803 0.10625 41.205 <2e-16 ***
## flwr_prev1 0.08474 0.03600 2.354 0.0186 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 4.186 9 2943.17 < 2e-16 ***
## s(plot) 2.705 3 20.38 < 2e-16 ***
## s(spei_history,L) 18.385 21 23.37 8.05e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.696 Deviance explained = 63.2%
## fREML = 12197 Scale est. = 1 n = 8527
draw(g_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
gratia::appraise(g_cf)
k.check(g_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.719313 0.972089 0.0525
## s(plot) 6 4.076761 NA NA
## s(spei_history,L) 70 13.644518 NA NA
check_res_edf(g_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 14.8
# plot_lag_slice(g_1ha, "spei_history", lag = 33)
hmm… CIs are super narrow here, but effect size is tiny
summary(g_cf)
##
## Family: Scaled t(3.903,0.416)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = c(3, 35),
## xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.35421 0.02597 167.665 <2e-16 ***
## flwr_prev1 0.02795 0.01482 1.886 0.0594 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 5.719 9 17697.579 <2e-16 ***
## s(plot) 4.077 5 6.703 <2e-16 ***
## s(spei_history,L) 13.645 19 101.657 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.784 Deviance explained = 69.1%
## fREML = 41018 Scale est. = 1 n = 28849
draw(g_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size.
summary(g_1ha)$edf[3]
## [1] 18.38534
summary(g_cf)$edf[3]
## [1] 13.64452
summary(g_cf_sub)$edf[3]
## [1] 12.33543
edf of subsample is similar to full dataset, despite differences in sample size.
draw(g_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks nearly identical.
f_1ha_qres <- qresid(f_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(f_1ha_qres))
#QQ plot
qqnorm(f_1ha_qres); qqline(f_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_1ha), residuals(f_1ha, type = "response"))
k.check(f_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 3.318396 0.9891395 0.92
## s(plot) 4 2.505335 NA NA
## s(spei_history,L) 70 9.758276 NA NA
## s(ha_id_number) 1266 67.206082 NA NA
# looking for near zero edf
check_res_edf(f_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 18.2
summary(f_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot, bs = "re") +
## s(spei_history, L, bs = "cb", k = c(3, 35), xt = list(bs = "cr")) +
## s(ha_id_number, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.9700 0.5369 -11.119 < 2e-16 ***
## flwr_prev1 0.7084 0.1739 4.075 4.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.318 9 364.88 < 2e-16 ***
## s(plot) 2.505 3 20.39 0.00108 **
## s(spei_history,L) 9.758 21 88.71 < 2e-16 ***
## s(ha_id_number) 67.206 1265 85.76 0.00092 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.29 Deviance explained = 44.5%
## fREML = 10585 Scale est. = 1 n = 8527
draw(f_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
f_cf_qres <- qresid(f_cf)
par(mfrow = c(2,2))
#histogram
plot(density(f_cf_qres))
#QQ plot
qqnorm(f_cf_qres); qqline(f_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_cf), residuals(f_cf, type = "response"))
k.check(f_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.698295 0.9701696 0.72
## s(plot) 6 3.899532 NA NA
## s(spei_history,L) 70 10.325026 NA NA
## s(ha_id_number) 4390 351.686745 NA NA
# looking for near zero edf
check_res_edf(f_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 4.1
summary(f_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr") + s(plot, bs = "re") +
## s(spei_history, L, bs = "cb", k = c(3, 35), xt = list(bs = "cr")) +
## s(ha_id_number, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.39245 0.24028 -22.442 < 2e-16 ***
## flwr_prev1 0.53988 0.08756 6.166 7.01e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 5.698 9 2104.21 <2e-16 ***
## s(plot) 3.900 5 69.33 <2e-16 ***
## s(spei_history,L) 10.325 21 425.95 <2e-16 ***
## s(ha_id_number) 351.687 4389 494.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.267 Deviance explained = 40.6%
## fREML = 36175 Scale est. = 1 n = 28849
draw(f_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size. For flowering, edf is actually slightly higher in CF with a larger sample size. With subsample, edf are more similar.
summary(f_1ha)$edf[3]
## [1] 9.758276
summary(f_cf)$edf[3]
## [1] 10.32503
summary(f_cf_sub)$edf[3]
## [1] 7.419691
draw(f_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks extremely similar.
Reproducibility receipt
## datetime
Sys.time()
## [1] "2021-08-17 18:26:34 EDT"
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## Local: revisions /Users/scottericr/Documents/HeliconiaDemography
## Remote: revisions @ origin (https://github.com/BrunaLab/HeliconiaDemography.git)
## Head: [d40aa88] 2021-08-17: function to plot "slices" through 2d smooths
## session info
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices datasets utils
## [8] methods base
##
## other attached packages:
## [1] arm_1.11-2 lme4_1.1-27.1 Matrix_1.3-3 MASS_7.3-54
## [5] qqplotr_0.0.5 Hmisc_4.5-0 Formula_1.2-4 survival_3.2-11
## [9] lattice_0.20-44 readxl_1.3.1 colorspace_2.0-1 rmarkdown_2.7
## [13] statmod_1.4.36 latex2exp_0.5.0 gratia_0.6.0.9112 broom_0.7.6
## [17] patchwork_1.1.1 glue_1.4.2 bbmle_1.0.23.1 dlnm_2.4.5
## [21] mgcv_1.8-36 nlme_3.1-152 lubridate_1.7.10 janitor_2.1.0
## [25] tsModel_0.6 SPEI_1.7 lmomco_2.3.6 tsibble_1.0.1
## [29] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4
## [33] readr_1.4.0 tidyr_1.1.3 tibble_3.1.1 ggplot2_3.3.5
## [37] tidyverse_1.3.1 here_1.0.1 tarchetypes_0.2.0 targets_0.4.2
## [41] dotenv_1.0.3 conflicted_1.0.4
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 igraph_1.2.6 splines_4.0.2
## [4] digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.2 modelr_0.1.8 bdsmatrix_1.3-4
## [13] anytime_0.3.9 jpeg_0.1-8.1 rvest_1.0.0
## [16] haven_2.4.1 xfun_0.22 callr_3.7.0
## [19] crayon_1.4.1 jsonlite_1.7.2 gtable_0.3.0
## [22] DEoptimR_1.0-8 abind_1.4-5 scales_1.1.1
## [25] mvtnorm_1.1-1 DBI_1.1.1 Rcpp_1.0.6
## [28] isoband_0.2.4 htmlTable_2.1.0 foreign_0.8-81
## [31] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [34] ellipsis_0.3.2 pkgconfig_2.0.3 farver_2.1.0
## [37] nnet_7.3-16 sass_0.3.1 dbplyr_2.1.1
## [40] utf8_1.2.1 tidyselect_1.1.1 labeling_0.4.2
## [43] rlang_0.4.11 munsell_0.5.0 cellranger_1.1.0
## [46] tools_4.0.2 cachem_1.0.4 cli_2.5.0
## [49] generics_0.1.0 evaluate_0.14 fastmap_1.1.0
## [52] yaml_2.2.1 goftest_1.2-2 processx_3.5.2
## [55] knitr_1.33 fs_1.5.0 robustbase_0.93-7
## [58] mvnfast_0.2.5.1 xml2_1.3.2 compiler_4.0.2
## [61] rstudioapi_0.13 png_0.1-7 reprex_2.0.0
## [64] bslib_0.2.4 stringi_1.6.2 highr_0.9
## [67] ps_1.6.0 nloptr_1.2.2.2 vctrs_0.3.8
## [70] pillar_1.6.0 lifecycle_1.0.0 jquerylib_0.1.4
## [73] data.table_1.14.0 R6_2.5.0 latticeExtra_0.6-29
## [76] renv_0.13.2 gridExtra_2.3 Lmoments_1.3-1
## [79] codetools_0.2-18 boot_1.3-28 assertthat_0.2.1
## [82] rprojroot_2.0.2 withr_2.4.2 hms_1.1.0
## [85] grid_4.0.2 rpart_4.1-15 coda_0.19-4
## [88] minqa_1.2.4 snakecase_0.11.0 git2r_0.28.0
## [91] numDeriv_2016.8-1.1 base64enc_0.1-3